Project 1: ACS Quality of Life per State, Education and Gender

We want to find a relationship betweeen the Attained Education, State, Gender, and the quality of life of US population.

The main assumption is that quality of life is directly proportional to with Salary [USD/hour] and Commute Time to work [hours].

Load the libraries and data

library(dplyr)
library(readr)
library(DT)
library(plotly)
library(ggplot2)
library(RColorBrewer)
library(d3heatmap)
load.data <- FALSE
if(load.data){
  datatable(head(acs14pusa,50), options = list(scrollX=T, pageLength = 10))
}

Basic information about original data

if(load.data){
  dim(acs14pusa)
  dim(acs14pusb)
}

Select relevant columns to reduce memory usage.

if(load.data){
  relevant.columns <- c("SERIALNO", "ST", "SEX", "AGEP", "SCHL", "INDP", "WKHP", "WAGP", "ESR",
                        "PINCP", "PERNP", "JWMNP")
  acs14pusa.cols <- acs14pusa[,colnames(acs14pusa)%in%relevant.columns]
  acs14pusb.cols <- acs14pusb[,colnames(acs14pusb)%in%relevant.columns]
  
  rm(acs14pusa, acs14pusb)
  gc()
}

Bind part a and b, and get basic information

if(load.data){
  acs14pus <- rbind(acs14pusa.cols, acs14pusb.cols)
  dim(acs14pus)
}

Add state names and abbreviations

Credits to Arnold Chua Lau (Spring 2016).

if(load.data){  
  ST.anno = read_csv("./data/statenames.csv")
  ST.anno = mutate(ST.anno, STabbr=abbr, STname=name)
  
  acs14pus = mutate(acs14pus, STnum = as.numeric(ST))
  acs14pus <- left_join(acs14pus, ST.anno, by = c("STnum" = "code"))
  
  select(sample_n(acs14pus,5), starts_with("ST"))
}

Convert data

acs14pus$JWMNP <- as.numeric(acs14pus$JWMNP)
acs14pus$WAGP <- as.numeric(acs14pus$WAGP)
acs14pus$WKHP <- as.numeric(acs14pus$WKHP)
acs14pus$STabbr <- as.factor(acs14pus$STabbr)
acs14pus$SCHL <- as.integer(acs14pus$SCHL)
acs14pus$WAGEHOUR <- acs14pus$WAGP / acs14pus$WKHP / 52
industry.categories = read_csv("./data/industry_codes.csv")
Parsed with column specification:
cols(
  code = col_integer(),
  industry = col_character()
)
education.categories = read_csv("./data/education_codes.csv")
Parsed with column specification:
cols(
  code = col_integer(),
  education = col_character()
)
acs14pus$SCHL <- as.integer(acs14pus$SCHL)
acs14pus <- left_join(acs14pus, education.categories, by = c("SCHL" = "code"))

Write result to a csv file. So we do not need to build them again (takes 30 min in a laptop)

if(load.data){
  write_csv(x = acs14pus, path = "./output/ss14pus_columns.csv" )
}

Summary statistics (Jaime)

Summary Statistics

summary.mean <- acs14pus %>% group_by(STabbr) %>% summarise(mean(na.omit(JWMNP)),
                                                       mean(na.omit(WAGP)),
                                                       mean(na.omit(WKHP)),
                                                       mean(na.omit(WAGEHOUR))
                                                       )
summary.mean[,-1] <- round(summary.mean[,-1],1)
names(summary.mean) <- c("STabbr", "JWMNP", "WAGP", "WKHP", "WAGEHOUR")
datatable(summary.mean, options = list(scrollX=T, pageLength = length(summary.mean$STabbr)))

BoxPlot - Commute Time

BoxPlot - Wage per Hour (Jaime)

JG: I am not convinced of these data values. DC is far too high!

Heatmap

After US population gets a bacherlor’s degree, there is a tendency tend to commute more than if you did not have the higher level degree

acs14pus <- acs14pus[order(acs14pus$SCHL),]
plot_ly(x = acs14pus$education,
        y = acs14pus$JWMNP,
        type = "box",
        sort = FALSE) %>%
  layout(title = "Commute time by Education Attainment",
         xaxis = list(title ="Education"),
         yaxis = list(title = "Commute time"),
        # width = 1000,
        # height = 700,
         legend = education.categories$education
         )
plot_ly(z = summary.median.2$JWMNP, 
        x = summary.median.2$education,
        y = summary.median.2$industry,
        type = "heatmap",
        colorscale = "Hot") %>%
  layout(title = "Commute time by Education Attainment and Industry",
         xaxis = list(title ="Education"),
         yaxis = list(title = "Industry"),
         width = 1000,
         height = 700
         )

Choropleth maps of US states (Ying)

DQoNCi0tLQ0KdGl0bGU6ICJSIE5vdGVib29rIg0Kb3V0cHV0Og0KICBodG1sX25vdGVib29rOiBkZWZhdWx0DQogIHBkZl9kb2N1bWVudDogZGVmYXVsdA0KLS0tDQojIyBQcm9qZWN0IDE6IEFDUyBRdWFsaXR5IG9mIExpZmUgcGVyIFN0YXRlLCBFZHVjYXRpb24gYW5kIEdlbmRlcg0KDQpXZSB3YW50IHRvIGZpbmQgYSByZWxhdGlvbnNoaXAgYmV0d2VlZW4gdGhlIEF0dGFpbmVkIEVkdWNhdGlvbiwgU3RhdGUsIEdlbmRlciwgYW5kIHRoZSBxdWFsaXR5IG9mIGxpZmUgb2YgVVMgcG9wdWxhdGlvbi4NCg0KVGhlIG1haW4gYXNzdW1wdGlvbiBpcyB0aGF0IHF1YWxpdHkgb2YgbGlmZSBpcyBkaXJlY3RseSBwcm9wb3J0aW9uYWwgdG8gd2l0aCBTYWxhcnkgW1VTRC9ob3VyXSBhbmQgQ29tbXV0ZSBUaW1lIHRvIHdvcmsgW2hvdXJzXS4NCg0KDQojIyBMb2FkIHRoZSBsaWJyYXJpZXMgYW5kIGRhdGENCg0KYGBge3IsIG1lc3NhZ2U9Rn0NCmxpYnJhcnkoZHBseXIpDQpsaWJyYXJ5KHJlYWRyKQ0KbGlicmFyeShEVCkNCmxpYnJhcnkocGxvdGx5KQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShSQ29sb3JCcmV3ZXIpDQpsaWJyYXJ5KGQzaGVhdG1hcCkNCg0KbG9hZC5kYXRhIDwtIEZBTFNFDQpgYGANCg0KYGBge3IsIGluY2x1ZGU9Rn0NCmlmKGxvYWQuZGF0YSl7DQogIGFjczE0cHVzYSA8LSByZWFkX2NzdigiLi9kYXRhL3NzMTRwdXNhLmNzdiIsIGd1ZXNzX21heCA9IDEwMDAwKQ0KICBhY3MxNHB1c2IgPC0gcmVhZF9jc3YoIi4vZGF0YS9zczE0cHVzYi5jc3YiLCBndWVzc19tYXggPSAxMDAwMCkNCg0KfSBlbHNlIHsNCiAgYWNzMTRwdXMgPC0gcmVhZF9jc3YoZmlsZSA9ICIuL291dHB1dC9zczE0cHVzX2NvbHVtbnMuY3N2IiApDQp9DQoNCg0KYGBgDQoNCmBgYHtyfQ0KaWYobG9hZC5kYXRhKXsNCiAgZGF0YXRhYmxlKGhlYWQoYWNzMTRwdXNhLDUwKSwgb3B0aW9ucyA9IGxpc3Qoc2Nyb2xsWD1ULCBwYWdlTGVuZ3RoID0gMTApKQ0KfQ0KYGBgDQoNCiMjIEJhc2ljIGluZm9ybWF0aW9uIGFib3V0IG9yaWdpbmFsIGRhdGENCmBgYHtyLG1lc3NhZ2U9Rn0NCmlmKGxvYWQuZGF0YSl7DQogIGRpbShhY3MxNHB1c2EpDQogIGRpbShhY3MxNHB1c2IpDQp9DQpgYGANCg0KIyMgU2VsZWN0IHJlbGV2YW50IGNvbHVtbnMgdG8gcmVkdWNlIG1lbW9yeSB1c2FnZS4NCmBgYHtyLG1lc3NhZ2U9Rn0NCmlmKGxvYWQuZGF0YSl7DQogIHJlbGV2YW50LmNvbHVtbnMgPC0gYygiU0VSSUFMTk8iLCAiU1QiLCAiU0VYIiwgIkFHRVAiLCAiU0NITCIsICJJTkRQIiwgIldLSFAiLCAiV0FHUCIsICJFU1IiLA0KICAgICAgICAgICAgICAgICAgICAgICAgIlBJTkNQIiwgIlBFUk5QIiwgIkpXTU5QIikNCiAgYWNzMTRwdXNhLmNvbHMgPC0gYWNzMTRwdXNhWyxjb2xuYW1lcyhhY3MxNHB1c2EpJWluJXJlbGV2YW50LmNvbHVtbnNdDQogIGFjczE0cHVzYi5jb2xzIDwtIGFjczE0cHVzYlssY29sbmFtZXMoYWNzMTRwdXNiKSVpbiVyZWxldmFudC5jb2x1bW5zXQ0KICANCiAgcm0oYWNzMTRwdXNhLCBhY3MxNHB1c2IpDQogIGdjKCkNCn0NCmBgYA0KDQojIyBCaW5kIHBhcnQgYSBhbmQgYiwgYW5kIGdldCBiYXNpYyBpbmZvcm1hdGlvbg0KYGBge3IsbWVzc2FnZT1GfQ0KaWYobG9hZC5kYXRhKXsNCiAgYWNzMTRwdXMgPC0gcmJpbmQoYWNzMTRwdXNhLmNvbHMsIGFjczE0cHVzYi5jb2xzKQ0KICBkaW0oYWNzMTRwdXMpDQp9DQoNCmBgYA0KDQojIyBBZGQgc3RhdGUgbmFtZXMgYW5kIGFiYnJldmlhdGlvbnMNCkNyZWRpdHMgdG8gQXJub2xkIENodWEgTGF1IChTcHJpbmcgMjAxNikuDQoNCmBgYHtyLCBtZXNzYWdlPUZ9DQppZihsb2FkLmRhdGEpeyAgDQogIFNULmFubm8gPSByZWFkX2NzdigiLi9kYXRhL3N0YXRlbmFtZXMuY3N2IikNCiAgU1QuYW5ubyA9IG11dGF0ZShTVC5hbm5vLCBTVGFiYnI9YWJiciwgU1RuYW1lPW5hbWUpDQogIA0KICBhY3MxNHB1cyA9IG11dGF0ZShhY3MxNHB1cywgU1RudW0gPSBhcy5udW1lcmljKFNUKSkNCiAgYWNzMTRwdXMgPC0gbGVmdF9qb2luKGFjczE0cHVzLCBTVC5hbm5vLCBieSA9IGMoIlNUbnVtIiA9ICJjb2RlIikpDQogIA0KICBzZWxlY3Qoc2FtcGxlX24oYWNzMTRwdXMsNSksIHN0YXJ0c193aXRoKCJTVCIpKQ0KfQ0KYGBgDQoNCiMjIENvbnZlcnQgZGF0YQ0KDQpgYGB7ciwgbWVzc2FnZT1GfQ0KDQphY3MxNHB1cyRKV01OUCA8LSBhcy5udW1lcmljKGFjczE0cHVzJEpXTU5QKQ0KYWNzMTRwdXMkV0FHUCA8LSBhcy5udW1lcmljKGFjczE0cHVzJFdBR1ApDQphY3MxNHB1cyRXS0hQIDwtIGFzLm51bWVyaWMoYWNzMTRwdXMkV0tIUCkNCmFjczE0cHVzJFNUYWJiciA8LSBhcy5mYWN0b3IoYWNzMTRwdXMkU1RhYmJyKQ0KYWNzMTRwdXMkU0NITCA8LSBhcy5pbnRlZ2VyKGFjczE0cHVzJFNDSEwpDQoNCmFjczE0cHVzJFdBR0VIT1VSIDwtIGFjczE0cHVzJFdBR1AgLyBhY3MxNHB1cyRXS0hQIC8gNTINCg0KDQppbmR1c3RyeS5jYXRlZ29yaWVzID0gcmVhZF9jc3YoIi4vZGF0YS9pbmR1c3RyeV9jb2Rlcy5jc3YiKQ0KDQplZHVjYXRpb24uY2F0ZWdvcmllcyA9IHJlYWRfY3N2KCIuL2RhdGEvZWR1Y2F0aW9uX2NvZGVzLmNzdiIpDQoNCmFjczE0cHVzJFNDSEwgPC0gYXMuaW50ZWdlcihhY3MxNHB1cyRTQ0hMKQ0KYWNzMTRwdXMgPC0gbGVmdF9qb2luKGFjczE0cHVzLCBlZHVjYXRpb24uY2F0ZWdvcmllcywgYnkgPSBjKCJTQ0hMIiA9ICJjb2RlIikpDQoNCg0KYGBgDQoNCg0KIyMgV3JpdGUgcmVzdWx0IHRvIGEgY3N2IGZpbGUuIFNvIHdlIGRvIG5vdCBuZWVkIHRvIGJ1aWxkIHRoZW0gYWdhaW4gKHRha2VzIDMwIG1pbiBpbiBhIGxhcHRvcCkNCmBgYHtyLG1lc3NhZ2U9Rn0NCmlmKGxvYWQuZGF0YSl7DQogIHdyaXRlX2Nzdih4ID0gYWNzMTRwdXMsIHBhdGggPSAiLi9vdXRwdXQvc3MxNHB1c19jb2x1bW5zLmNzdiIgKQ0KfQ0KYGBgDQoNCg0KIyBTdW1tYXJ5IHN0YXRpc3RpY3MgKEphaW1lKQ0KDQojIFN1bW1hcnkgU3RhdGlzdGljcw0KYGBge3IsIG1lc3NhZ2U9RkFMU0V9DQpzdW1tYXJ5Lm1lYW4gPC0gYWNzMTRwdXMgJT4lIGdyb3VwX2J5KFNUYWJicikgJT4lIHN1bW1hcmlzZShtZWFuKG5hLm9taXQoSldNTlApKSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtZWFuKG5hLm9taXQoV0FHUCkpLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1lYW4obmEub21pdChXS0hQKSksDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbWVhbihuYS5vbWl0KFdBR0VIT1VSKSkNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICApDQoNCnN1bW1hcnkubWVhblssLTFdIDwtIHJvdW5kKHN1bW1hcnkubWVhblssLTFdLDEpDQpuYW1lcyhzdW1tYXJ5Lm1lYW4pIDwtIGMoIlNUYWJiciIsICJKV01OUCIsICJXQUdQIiwgIldLSFAiLCAiV0FHRUhPVVIiKQ0KDQpkYXRhdGFibGUoc3VtbWFyeS5tZWFuLCBvcHRpb25zID0gbGlzdChzY3JvbGxYPVQsIHBhZ2VMZW5ndGggPSBsZW5ndGgoc3VtbWFyeS5tZWFuJFNUYWJicikpKQ0KDQpgYGANCg0KDQojIEJveFBsb3QgLSBDb21tdXRlIFRpbWUNCmBgYHtyLCBtZXNzYWdlPUZBTFNFLCBlY2hvPUZBTFNFfQ0KDQpnYygpDQoNCnN1bW1hcnkubWVhbiA8LSBzdW1tYXJ5Lm1lYW5bb3JkZXIoc3VtbWFyeS5tZWFuJEpXTU5QKSxdDQphY3MxNHB1cyRTVGFiYnIgPC0gZmFjdG9yKGFjczE0cHVzJFNUYWJiciwgbGV2ZWxzID0gc3VtbWFyeS5tZWFuJFNUYWJicikNCg0KcGxvdF9seSh4ID0gYWNzMTRwdXMkU1RhYmJyICwgeSA9IGFjczE0cHVzJEpXTU5QICwgdHlwZSA9ICJib3giKSAlPiUNCiAgbGF5b3V0KHRpdGxlID0gIkNvbW11dGUgdGltZSBwZXIgU3RhdGUiLA0KICAgICAgICAgeGF4aXMgPSBsaXN0KHRpdGxlID0iU3RhdGUiKSwNCiAgICAgICAgIHlheGlzID0gbGlzdCh0aXRsZSA9ICJDb21tdXRlIHRpbWUgKG1pbnV0ZXMpIikNCiAgICAgICAgICkNCg0KDQoNCmBgYA0KDQojIEJveFBsb3QgLSBXYWdlIHBlciBIb3VyIChKYWltZSkNCiMgSkc6IEkgYW0gbm90IGNvbnZpbmNlZCBvZiB0aGVzZSBkYXRhIHZhbHVlcy4gREMgaXMgZmFyIHRvbyBoaWdoIQ0KDQpgYGB7ciwgbWVzc2FnZT1GQUxTRSwgZWNobz1GQUxTRX0NCnBsb3RfbHkoeCA9IGFjczE0cHVzJFNUYWJiciAsIHkgPSBhY3MxNHB1cyRXQUdQICwgdHlwZSA9ICJib3giKSAlPiUNCiAgbGF5b3V0KHRpdGxlID0gIjEyLU1vbnRoIFdhZ2UgcGVyIFN0YXRlIiwNCiAgICAgICAgIHNjZW5lID0gbGlzdCgNCiAgICAgICAgICAgeGF4aXMgPSBsaXN0KHRpdGxlID0iU3RhdGUiKSwNCiAgICAgICAgICAgeWF4aXMgPSBsaXN0KHRpdGxlID0gIldhZ2UiKQ0KICAgICAgICAgKSkNCg0KYGBgDQoNCg0KIyMgSGVhdG1hcA0KDQpgYGB7ciwgbWVzc2FnZT1GQUxTRX0NCnN1bW1hcnkubWVkaWFuLjIgPC0gYWNzMTRwdXMgJT4lIGdyb3VwX2J5KFNDSEwsIElORFApICU+JSBzdW1tYXJpc2UobWVkaWFuKG5hLm9taXQoSldNTlApKSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtZWRpYW4obmEub21pdChXQUdQKSksDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbWVkaWFuKG5hLm9taXQoV0tIUCkpLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1lZGlhbihuYS5vbWl0KFdBR0VIT1VSKSkNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICApDQpzdW1tYXJ5Lm1lZGlhbi4yIDwtIG5hLm9taXQoc3VtbWFyeS5tZWRpYW4uMikNCg0Kc3VtbWFyeS5tZWRpYW4uMlssLWMoMSwyKV0gPC0gcm91bmQoc3VtbWFyeS5tZWRpYW4uMlssLWMoMSwyKV0sMSkNCm5hbWVzKHN1bW1hcnkubWVkaWFuLjIpIDwtIGMoIlNDSEwiLCAiSU5EUCIsICJKV01OUCIsICJXQUdQIiwgIldLSFAiLCAiV0FHRUhPVVIiKQ0Kc3VtbWFyeS5tZWRpYW4uMiRTQ0hMIDwtIGFzLmZhY3RvcihzdW1tYXJ5Lm1lZGlhbi4yJFNDSEwpDQpzdW1tYXJ5Lm1lZGlhbi4yJElORFAgPC0gYXMuaW50ZWdlcihzdW1tYXJ5Lm1lZGlhbi4yJElORFApDQpzdW1tYXJ5Lm1lZGlhbi4yJFNDSEwgPC0gYXMuaW50ZWdlcihzdW1tYXJ5Lm1lZGlhbi4yJFNDSEwpDQoNCg0Kc3VtbWFyeS5tZWRpYW4uMiA8LSBuYS5vbWl0KHN1bW1hcnkubWVkaWFuLjIpDQoNCnN1bW1hcnkubWVkaWFuLjIgPC0gbGVmdF9qb2luKHN1bW1hcnkubWVkaWFuLjIsIGluZHVzdHJ5LmNhdGVnb3JpZXMsIGJ5ID0gYygiSU5EUCIgPSAiY29kZSIpKQ0KDQpzdW1tYXJ5Lm1lZGlhbi4yIDwtIGxlZnRfam9pbihzdW1tYXJ5Lm1lZGlhbi4yLCBlZHVjYXRpb24uY2F0ZWdvcmllcywgYnkgPSBjKCJTQ0hMIiA9ICJjb2RlIikpDQpgYGANCg0KIyMgQWZ0ZXIgVVMgcG9wdWxhdGlvbiBnZXRzIGEgYmFjaGVybG9yJ3MgZGVncmVlLCB0aGVyZSBpcyBhIHRlbmRlbmN5IHRlbmQgdG8gY29tbXV0ZSBtb3JlIHRoYW4gaWYgeW91IGRpZCBub3QgaGF2ZSB0aGUgaGlnaGVyIGxldmVsIGRlZ3JlZSANCmBgYHtyfQ0KDQphY3MxNHB1cyA8LSBhY3MxNHB1c1tvcmRlcihhY3MxNHB1cyRTQ0hMKSxdDQoNCnBsb3RfbHkoeCA9IGFjczE0cHVzJGVkdWNhdGlvbiwNCiAgICAgICAgeSA9IGFjczE0cHVzJEpXTU5QLA0KICAgICAgICB0eXBlID0gImJveCIsDQogICAgICAgIHNvcnQgPSBGQUxTRSkgJT4lDQogIGxheW91dCh0aXRsZSA9ICJDb21tdXRlIHRpbWUgYnkgRWR1Y2F0aW9uIEF0dGFpbm1lbnQiLA0KICAgICAgICAgeGF4aXMgPSBsaXN0KHRpdGxlID0iRWR1Y2F0aW9uIiksDQogICAgICAgICB5YXhpcyA9IGxpc3QodGl0bGUgPSAiQ29tbXV0ZSB0aW1lIiksDQogICAgICAgICMgd2lkdGggPSAxMDAwLA0KICAgICAgICAjIGhlaWdodCA9IDcwMCwNCiAgICAgICAgIGxlZ2VuZCA9IGVkdWNhdGlvbi5jYXRlZ29yaWVzJGVkdWNhdGlvbg0KICAgICAgICAgKQ0KDQoNCmBgYA0KDQpgYGB7cn0NCnBsb3RfbHkoeiA9IHN1bW1hcnkubWVkaWFuLjIkSldNTlAsIA0KICAgICAgICB4ID0gc3VtbWFyeS5tZWRpYW4uMiRlZHVjYXRpb24sDQogICAgICAgIHkgPSBzdW1tYXJ5Lm1lZGlhbi4yJGluZHVzdHJ5LA0KICAgICAgICB0eXBlID0gImhlYXRtYXAiLA0KICAgICAgICBjb2xvcnNjYWxlID0gIkhvdCIpICU+JQ0KICBsYXlvdXQodGl0bGUgPSAiQ29tbXV0ZSB0aW1lIGJ5IEVkdWNhdGlvbiBBdHRhaW5tZW50IGFuZCBJbmR1c3RyeSIsDQogICAgICAgICB4YXhpcyA9IGxpc3QodGl0bGUgPSJFZHVjYXRpb24iKSwNCiAgICAgICAgIHlheGlzID0gbGlzdCh0aXRsZSA9ICJJbmR1c3RyeSIpLA0KICAgICAgICAgd2lkdGggPSAxMDAwLA0KICAgICAgICAgaGVpZ2h0ID0gNzAwDQogICAgICAgICApDQoNCg0KYGBgDQoNCg0KIyMgQ2hvcm9wbGV0aCBtYXBzIG9mIFVTIHN0YXRlcyAoWWluZykNCg0KDQo=